#Library Imports
import numpy as np
import pandas as pd
import math
import os
import matplotlib.pyplot as plt

from sklearn.metrics import mean_absolute_error

#######딥러닝 라이브러리##########
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Reshape, GRU, RNN

tf.keras.backend.set_floatx('float64')
INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2
train=pd.read_csv('train.csv')
test=pd.read_csv('test.csv')
submission=pd.read_csv('sample_submission.csv')
#train.shape 122400 X 10
#60개의 건물 X 85일 24시간 =122400
train
num date_time 전력사용량(kWh) 기온(°C) 풍속(m/s) 습도(%) 강수량(mm) 일조(hr) 비전기냉방설비운영 태양광보유
0 1 2020-06-01 00 8179.056 17.6 2.5 92.0 0.8 0.0 0.0 0.0
1 1 2020-06-01 01 8135.640 17.7 2.9 91.0 0.3 0.0 0.0 0.0
2 1 2020-06-01 02 8107.128 17.5 3.2 91.0 0.0 0.0 0.0 0.0
3 1 2020-06-01 03 8048.808 17.1 3.2 91.0 0.0 0.0 0.0 0.0
4 1 2020-06-01 04 8043.624 17.0 3.3 92.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ...
122395 60 2020-08-24 19 4114.368 27.8 2.3 68.0 0.0 0.7 1.0 1.0
122396 60 2020-08-24 20 3975.696 27.3 1.2 71.0 0.0 0.0 1.0 1.0
122397 60 2020-08-24 21 3572.208 27.3 1.8 71.0 0.0 0.0 1.0 1.0
122398 60 2020-08-24 22 3299.184 27.1 1.8 74.0 0.0 0.0 1.0 1.0
122399 60 2020-08-24 23 3204.576 27.1 2.6 75.0 0.0 0.0 1.0 1.0

122400 rows × 10 columns

#test.shape 10080 X 9
#60개의 건물 X 7일 24시간 =10080
test
num date_time 기온(°C) 풍속(m/s) 습도(%) 강수량(mm, 6시간) 일조(hr, 3시간) 비전기냉방설비운영 태양광보유
0 1 2020-08-25 00 27.8 1.5 74.0 0.0 0.0 NaN NaN
1 1 2020-08-25 01 NaN NaN NaN NaN NaN NaN NaN
2 1 2020-08-25 02 NaN NaN NaN NaN NaN NaN NaN
3 1 2020-08-25 03 27.3 1.1 78.0 NaN 0.0 NaN NaN
4 1 2020-08-25 04 NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ...
10075 60 2020-08-31 19 NaN NaN NaN NaN NaN NaN NaN
10076 60 2020-08-31 20 NaN NaN NaN NaN NaN NaN NaN
10077 60 2020-08-31 21 27.9 4.1 68.0 NaN 0.0 1.0 1.0
10078 60 2020-08-31 22 NaN NaN NaN NaN NaN NaN NaN
10079 60 2020-08-31 23 NaN NaN NaN NaN NaN NaN NaN

10080 rows × 9 columns

딥러닝 모델(전력사용량만 변수로 사용)

##############전력사용량(kWh) 정규화####################################

mini=train.iloc[:,2].min()
size=train.iloc[:,2].max()-train.iloc[:,2].min()
train.iloc[:,2]=(train.iloc[:,2]-mini)/size
train
num date_time 전력사용량(kWh) 기온(°C) 풍속(m/s) 습도(%) 강수량(mm) 일조(hr) 비전기냉방설비운영 태양광보유
0 1 2020-06-01 00 0.461072 17.6 2.5 92.0 0.8 0.0 0.0 0.0
1 1 2020-06-01 01 0.458624 17.7 2.9 91.0 0.3 0.0 0.0 0.0
2 1 2020-06-01 02 0.457017 17.5 3.2 91.0 0.0 0.0 0.0 0.0
3 1 2020-06-01 03 0.453729 17.1 3.2 91.0 0.0 0.0 0.0 0.0
4 1 2020-06-01 04 0.453437 17.0 3.3 92.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ...
122395 60 2020-08-24 19 0.231936 27.8 2.3 68.0 0.0 0.7 1.0 1.0
122396 60 2020-08-24 20 0.224119 27.3 1.2 71.0 0.0 0.0 1.0 1.0
122397 60 2020-08-24 21 0.201373 27.3 1.8 71.0 0.0 0.0 1.0 1.0
122398 60 2020-08-24 22 0.185982 27.1 1.8 74.0 0.0 0.0 1.0 1.0
122399 60 2020-08-24 23 0.180649 27.1 2.6 75.0 0.0 0.0 1.0 1.0

122400 rows × 10 columns

####################################################

input_window =996 #임의의 수
output_window = 24 #168 7일 24시간
window = 12 #window는 12시간 마다는 12시간 마다
num_features = 1 #베이스라인은 feature를 하나만 사용했습니다.
num_power = 60
end_=168
lstm_units=32
dropout=0.2
EPOCH=30
BATCH_SIZE=128
#train을 tensor로 변경 (60, 24*85, 1)
train_x=tf.reshape(train.iloc[:,2].values, [num_power, 24*85, num_features])
print(f'train_x.shape:{train_x.shape}')
train_x.shape:(60, 2040, 1)
#train_window_x np.zeros를 만듬 (60, 85, 996, 1)
train_window_x= np.zeros(( train_x.shape[0], (train_x.shape[1]-(input_window + output_window))//window, input_window, num_features)) 
train_window_y= np.zeros(( train_x.shape[0], (train_x.shape[1]-(input_window + output_window))//window, output_window, num_features))
print(f'train_window_x.shape:{train_window_x.shape}')
print(f'train_window_y.shape:{train_window_y.shape}')
train_window_x.shape:(60, 85, 996, 1)
train_window_y.shape:(60, 85, 24, 1)
#train_window_x에 train값 채워넣기
for example in range(train_x.shape[0]):
    
    for start in range(0, train_x.shape[1]-(input_window+output_window), window):
        end=start+input_window
        train_window_x[example, start//window, :] = train_x[example, start: end               , :]
        train_window_y[example, start//window, :] = train_x[example, end  : end+ output_window, :]
#new_train_x, reshape통해 lstm에 알맞은 형태로 집어넣기
new_train_x=tf.reshape(train_window_x, [-1, input_window, num_features])
new_train_y=tf.reshape(train_window_y, [-1, output_window,num_features])
print(f'new_train_x.shape:{new_train_x.shape}')
print(f'new_train_y.shape:{new_train_y.shape}')
new_train_x.shape:(5100, 996, 1)
new_train_y.shape:(5100, 24, 1)
#####층 쌓기###########


model=Sequential([
LSTM(lstm_units, return_sequences=False, recurrent_dropout=dropout),
Dense(output_window * num_features, kernel_initializer=tf.initializers.zeros()), 
Reshape([output_window, num_features])
])
#######Compile 구성하기################


model.compile(optimizer='rmsprop', loss='mae', metrics=['mae'])
# 에포크가 끝날 때마다 점(.)을 출력해 훈련 진행 과정을 표시합니다
class PrintDot(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if epoch % 10 == 0: print('')
        print('.', end='')

#가장 좋은 성능을 낸 val_loss가 적은 model만 남겨 놓았습니다.
save_best_only=tf.keras.callbacks.ModelCheckpoint(filepath="lstm_model.h5", monitor='val_loss', save_best_only=True)


early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

#검증 손실이 10epoch 동안 좋아지지 않으면 학습률을 0.1 배로 재구성하는 명령어입니다.
reduceLR = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10)
######################
model.fit(new_train_x, new_train_y, epochs=EPOCH, batch_size=BATCH_SIZE, validation_split = 0.2, verbose=0,
          callbacks=[PrintDot(), early_stop, save_best_only , reduceLR])

model.summary()
..........
..........
..........Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm (LSTM)                  (None, 32)                4352      
_________________________________________________________________
dense (Dense)                (None, 24)                792       
_________________________________________________________________
reshape (Reshape)            (None, 24, 1)             0         
=================================================================
Total params: 5,144
Trainable params: 5,144
Non-trainable params: 0
_________________________________________________________________
#######################
prediction=np.zeros((num_power, end_, num_features))
new_test_x=train_x

for i in range(end_//output_window):
    start_=i*output_window
    next_=model.predict(new_test_x[ : , -input_window:, :])
    new_test_x = tf.concat([new_test_x, next_], axis=1)
    print(new_test_x.shape)
    prediction[:, start_: start_ + output_window, :]= next_
prediction =prediction *size + mini
(60, 2064, 1)
(60, 2088, 1)
(60, 2112, 1)
(60, 2136, 1)
(60, 2160, 1)
(60, 2184, 1)
(60, 2208, 1)
submission['answer']=prediction.reshape([-1,1])
submission
num_date_time answer
0 1 2020-08-25 00 8946.525027
1 1 2020-08-25 01 8944.279289
2 1 2020-08-25 02 8936.898822
3 1 2020-08-25 03 8880.748026
4 1 2020-08-25 04 8911.488778
... ... ...
10075 60 2020-08-31 19 6455.899722
10076 60 2020-08-31 20 6050.066612
10077 60 2020-08-31 21 5599.296463
10078 60 2020-08-31 22 5429.713586
10079 60 2020-08-31 23 5538.318500

10080 rows × 2 columns

submission.to_csv('baseline_submission1.csv', index=False)